1 Example hpgltool usage with a real data set (fission)

This document aims to provide further examples in how to use the hpgltools.

Note to self, the header has rmarkdown::pdf_document instead of html_document or html_vignette because it gets some bullcrap error ‘margins too large’…

1.1 Setting up

Here are the commands I invoke to get ready to play with new data, including everything required to install hpgltools, the software it uses, and the fission data.

library(hpgltools)
tt <- sm(library(fission))
tt <- data(fission)

1.2 Data import

All the work I do in Dr. El-Sayed’s lab makes some pretty hard assumptions about how data is stored. As a result, to use the fission data set I will do a little bit of shenanigans to match it to the expected format. Now that I have played a little with fission, I think its format is quite nice and am likely to have my experiment class instead be a SummarizedExperiment.

## Extract the meta data from the fission dataset
meta <- as.data.frame(fission@colData)
## Make conditions and batches
meta$condition <- paste(meta$strain, meta$minute, sep=".")
meta$batch <- meta$replicate
meta$sample.id <- rownames(meta)
## Grab the count data
fission_data <- fission@assays$data$counts
## This will make an experiment superclass called 'expt' and it contains
## an ExpressionSet along with any arbitrary additional information one might want to include.
## Along the way it writes a Rdata file which is by default called 'expt.Rdata'
fission_expt <- create_expt(metadata=meta, count_dataframe=fission_data)
## Reading the sample metadata.
## The sample definitions comprises: 36, 7 rows, columns.
## Matched 7039 annotations and counts.
## Bringing together the count matrix and gene information.

2 Some simple differential expression analyses

Travis wisely imposes a limit on the amount of time for building vignettes. My tools by default will attempt all possible pairwise comparisons, which takes a long time. Therefore I am going to take a subset of the data and limit these comparisons to that.

fun_data <- subset_expt(fission_expt,
                        subset="condition=='wt.120'|condition=='wt.30'")
## There were 36, now there are 6 samples.
fun_norm <- sm(normalize_expt(fun_data, batch="limma", norm="quant",
                              transform="log2", convert="cpm"))

2.1 Try using limma first

limma_comparison <- sm(limma_pairwise(fun_data))

names(limma_comparison$all_tables)
## [1] "wt30_vs_wt120"
summary(limma_comparison$all_tables$wt30_vs_wt120)
##      logFC           AveExpr            t             P.Value      
##  Min.   :-4.278   Min.   :-4.58   Min.   :-88.48   Min.   :0.0000  
##  1st Qu.:-0.399   1st Qu.: 1.11   1st Qu.: -2.60   1st Qu.:0.0192  
##  Median :-0.020   Median : 3.97   Median : -0.13   Median :0.1240  
##  Mean   : 0.008   Mean   : 3.11   Mean   : -0.17   Mean   :0.2792  
##  3rd Qu.: 0.300   3rd Qu.: 5.44   3rd Qu.:  1.72   3rd Qu.:0.4653  
##  Max.   : 7.075   Max.   :18.59   Max.   : 62.44   Max.   :1.0000  
##    adj.P.Val            B        
##  Min.   :0.0170   Min.   :-8.29  
##  1st Qu.:0.0767   1st Qu.:-6.58  
##  Median :0.2479   Median :-5.50  
##  Mean   :0.3686   Mean   :-4.87  
##  3rd Qu.:0.6204   3rd Qu.:-3.50  
##  Max.   :1.0000   Max.   : 4.83
scatter_wt_mut <- extract_coefficient_scatter(limma_comparison, type="limma",
                                              x="wt30", y="wt120")
## This can do comparisons among the following columns in the pairwise result:
## wt120, wt30
## Actually comparing wt30 and wt120.
scatter_wt_mut$scatter

scatter_wt_mut$both_histogram$plot + ggplot2::scale_y_continuous(limits=c(0, 0.20))
## Warning: Removed 7039 rows containing non-finite values (stat_bin).
## Warning: Removed 7039 rows containing non-finite values (stat_density).
## Warning: Removed 4 rows containing missing values (geom_bar).
## Warning: Removed 1 rows containing missing values (geom_vline).

ma_wt_mut <- extract_de_plots(limma_comparison, type="limma")
ma_wt_mut$ma$plot

ma_wt_mut$volcano$plot

2.2 Then DESeq2

deseq_comparison <- sm(deseq2_pairwise(fun_data))

summary(deseq_comparison$all_tables$wt30_vs_wt120)
##     baseMean           logFC            lfcSE            stat        
##  Min.   :      0   Min.   :-5.615   Min.   :0.000   Min.   :-20.800  
##  1st Qu.:     28   1st Qu.:-0.386   1st Qu.:0.168   1st Qu.: -1.176  
##  Median :    192   Median : 0.000   Median :0.222   Median :  0.000  
##  Mean   :   1703   Mean   : 0.020   Mean   :0.489   Mean   :  0.168  
##  3rd Qu.:    536   3rd Qu.: 0.343   3rd Qu.:0.412   3rd Qu.:  1.109  
##  Max.   :4924000   Max.   : 7.212   Max.   :4.072   Max.   : 30.370  
##     P.Value         adj.P.Val     
##  Min.   :0.0000   Min.   :0.0000  
##  1st Qu.:0.0197   1st Qu.:0.0685  
##  Median :0.2503   Median :0.4676  
##  Mean   :0.3600   Mean   :0.4805  
##  3rd Qu.:0.6666   3rd Qu.:0.8732  
##  Max.   :1.0000   Max.   :1.0000
scatter_wt_mut <- extract_coefficient_scatter(deseq_comparison, type="deseq",
                                              x="wt30", y="wt120", gvis_filename=NULL)
## This can do comparisons among the following columns in the pairwise result:
## wt120, wt30, r2, r3
## Actually comparing wt30 and wt120.
scatter_wt_mut$scatter

plots_wt_mut <- extract_de_plots(deseq_comparison, type="deseq")
plots_wt_mut$ma$plot

plots_wt_mut$volcano$plot

2.3 And EdgeR

edger_comparison <- sm(edger_pairwise(fun_data, model_batch=TRUE))

plots_wt_mut <- extract_de_plots(edger_comparison, type="edger")
scatter_wt_mut <- extract_coefficient_scatter(edger_comparison, type="edger",
                                              x="wt30", y="wt120", gvis_filename=NULL)
## This can do comparisons among the following columns in the pairwise result:
## wt120, wt30
## Actually comparing wt30 and wt120.
scatter_wt_mut$scatter

plots_wt_mut$ma$plot

plots_wt_mut$volcano$plot

2.4 My stupid basic comparison

basic_comparison <- sm(basic_pairwise(fun_data))
summary(basic_comparison$all_tables$wt30_vs_wt120)
##  numerator_median denominator_median numerator_var      denominator_var   
##  Min.   :-2.73    Min.   :-3.60      Length:5505        Length:5505       
##  1st Qu.: 3.31    1st Qu.: 3.31      Class :character   Class :character  
##  Median : 4.65    Median : 4.63      Mode  :character   Mode  :character  
##  Mean   : 4.71    Mean   : 4.71                                           
##  3rd Qu.: 5.94    3rd Qu.: 5.93                                           
##  Max.   :18.61    Max.   :18.61                                           
##        t               p                 logFC            adjp          
##  Min.   :-49.10   Length:5505        Min.   :-4.263   Length:5505       
##  1st Qu.: -1.53   Class :character   1st Qu.:-0.406   Class :character  
##  Median :  0.39   Mode  :character   Median :-0.070   Mode  :character  
##  Mean   :  0.16                      Mean   : 0.008                     
##  3rd Qu.:  2.10                      3rd Qu.: 0.297                     
##  Max.   : 50.21                      Max.   : 7.485
scatter_wt_mut <- extract_coefficient_scatter(basic_comparison, type="basic",
                                              x="wt30", y="wt120", gvis_filename=NULL)
## This can do comparisons among the following columns in the pairwise result:
## wt120, wt30
## Actually comparing wt30 and wt120.
scatter_wt_mut$scatter

plots_wt_mut <- extract_de_plots(basic_comparison, type="basic")
plots_wt_mut$ma$plot

plots_wt_mut$volcano$plot

2.5 Combine them all

all_comparisons <- sm(all_pairwise(fun_data, model_batch=TRUE))
all_combined <- sm(combine_de_tables(all_comparisons, excel=FALSE))
head(all_combined$data[[1]])
##              limma_logfc limma_adjp deseq_logfc deseq_adjp edger_logfc
## SPAC1002.01     -0.99860    0.16930    -1.08000    0.36640    -1.05900
## SPAC1002.02      0.03778    0.99460    -0.01485    0.98160    -0.02342
## SPAC1002.03c    -0.33910    0.02432    -0.22760    0.23270    -0.23630
## SPAC1002.04c     0.31760    0.33060     0.33550    0.32470     0.32580
## SPAC1002.05c     0.75440    0.08050     0.74810    0.01187     0.74120
## SPAC1002.06c     0.69490    0.68500     0.50550    1.00000     0.69240
##              edger_adjp limma_ave  limma_t limma_b limma_p deseq_basemean
## SPAC1002.01   0.2201000   -0.1955  -2.8320 -4.0790 0.07147         11.150
## SPAC1002.02   1.0000000    2.8470   0.1354 -7.4050 0.90140         87.420
## SPAC1002.03c  0.1598000    7.0770 -12.5400 -0.6495 0.00151       1621.000
## SPAC1002.04c  0.2151000    4.1960   1.7300 -6.5590 0.18830        222.200
## SPAC1002.05c  0.0009489    3.9020   4.6960 -3.7270 0.02118        187.200
## SPAC1002.06c  0.7328000   -1.9060   0.7146 -6.1430 0.52970          4.176
##              deseq_lfcse deseq_stat  deseq_p edger_logcpm  edger_lr
## SPAC1002.01       0.8209   -1.31600 0.188200      0.06691  2.745000
## SPAC1002.02       0.3316   -0.04479 0.964300      2.89400  0.007429
## SPAC1002.03c      0.1387   -1.64100 0.100800      7.09500  3.399000
## SPAC1002.04c      0.2382    1.40800 0.159000      4.24700  2.794000
## SPAC1002.05c      0.2464    3.03700 0.002393      3.99900 14.270000
## SPAC1002.06c      1.4310    0.35330 0.723800     -0.89280  0.370800
##               edger_p basic_nummed basic_denmed basic_numvar basic_denvar
## SPAC1002.01  0.097570        0.000        0.000            0            0
## SPAC1002.02  0.931300        3.100        2.774    3.603e-01    2.890e-02
## SPAC1002.03c 0.065220        6.909        7.248    2.955e-03    1.016e-03
## SPAC1002.04c 0.094620        4.407        4.195    5.418e-02    1.441e-01
## SPAC1002.05c 0.000158        4.272        3.625    1.293e-01    5.645e-02
## SPAC1002.06c 0.542600        0.000        0.000            0            0
##              basic_logfc  basic_t   basic_p basic_adjp limma_adjp_fdr
## SPAC1002.01       0.0000  0.00000         0          0      1.693e-01
## SPAC1002.02       0.3260 -0.01124 9.919e-01  9.963e-01      9.945e-01
## SPAC1002.03c     -0.3390  9.25600 1.969e-03  3.718e-02      2.432e-02
## SPAC1002.04c      0.2125 -1.26900 2.862e-01  4.542e-01      3.305e-01
## SPAC1002.05c      0.6472 -2.95900 4.975e-02  1.630e-01      8.050e-02
## SPAC1002.06c      0.0000  0.00000         0          0      6.850e-01
##              deseq_adjp_fdr edger_adjp_fdr basic_adjp_fdr   lfc_meta
## SPAC1002.01       4.186e-01      2.201e-01      0.000e+00 -1.0520000
## SPAC1002.02       1.000e+00      1.000e+00      9.954e-01  0.0007106
## SPAC1002.03c      2.682e-01      1.598e-01      7.607e-03 -0.2681000
## SPAC1002.04c      3.722e-01      2.151e-01      4.027e-01  0.3262000
## SPAC1002.05c      1.393e-02      9.489e-04      1.090e-01  0.7512000
## SPAC1002.06c      9.279e-01      7.328e-01      0.000e+00  0.6331000
##                lfc_var lfc_varbymed    p_meta     p_var
## SPAC1002.01  7.689e-04   -7.305e-04 1.191e-01 3.753e-03
## SPAC1002.02  3.191e-03    4.491e+00 9.323e-01 9.899e-04
## SPAC1002.03c 2.166e-03   -8.081e-03 5.584e-02 2.531e-03
## SPAC1002.04c 1.943e-04    5.956e-04 1.473e-01 2.297e-03
## SPAC1002.05c 1.320e-03    1.757e-03 7.910e-03 1.333e-04
## SPAC1002.06c 1.516e-02    2.394e-02 5.987e-01 1.178e-02
sig_genes <- sm(extract_significant_genes(all_combined, excel=FALSE))
head(sig_genes$limma$ups[[1]])
##               limma_logfc limma_adjp deseq_logfc deseq_adjp edger_logfc
## SPBC2F12.09c        7.075    0.01847       7.212  5.259e-66       7.170
## SPAC22A12.17c       5.609    0.02447       5.855  3.969e-19       5.822
## SPAPB1A11.02        5.606    0.01696       6.739  1.894e-06       6.483
## SPCPB16A4.07        5.576    0.01696       5.693 8.168e-199       5.684
## SPNCRNA.1611        5.410    0.01696       5.612  5.051e-16       5.529
## SPBC660.05          5.229    0.02795       5.403  3.101e-14       5.310
##               edger_adjp limma_ave limma_t limma_b   limma_p
## SPBC2F12.09c  1.264e-180    2.5920   19.36  0.8017 0.0004519
## SPAC22A12.17c  3.155e-57    6.4820   12.49 -0.3953 0.0015260
## SPAPB1A11.02   1.257e-14   -1.1920   27.66  0.2698 0.0001667
## SPCPB16A4.07  4.519e-138    6.5360   28.74  2.2890 0.0001498
## SPNCRNA.1611   1.789e-33    0.5594   39.34  1.0980 0.0000622
## SPBC660.05     9.868e-74    3.6400   10.56 -0.5550 0.0024270
##               deseq_basemean deseq_lfcse deseq_stat    deseq_p
## SPBC2F12.09c          443.50      0.4123     17.490  1.667e-68
## SPAC22A12.17c        4289.00      0.6255      9.360  7.945e-21
## SPAPB1A11.02           21.20      1.2810      5.259  1.447e-07
## SPCPB16A4.07         4157.00      0.1875     30.370 1.363e-202
## SPNCRNA.1611           58.57      0.6571      8.541  1.331e-17
## SPBC660.05            523.80      0.6724      8.035  9.365e-16
##               edger_logcpm edger_lr    edger_p basic_nummed basic_denmed
## SPBC2F12.09c        5.2210   839.00 1.796e-184        6.250       -1.235
## SPAC22A12.17c       8.4930   264.90  1.479e-59        9.396        4.087
## SPAPB1A11.02        0.9166    65.83  4.910e-16        1.491       -3.604
## SPCPB16A4.07        8.4490   641.90 1.284e-141        9.416        3.641
## SPNCRNA.1611        2.3170   154.00  2.363e-35        3.380       -1.604
## SPBC660.05          5.4560   341.60  2.804e-76        6.156        1.381
##               basic_numvar basic_denvar basic_logfc basic_t   basic_p
## SPBC2F12.09c     2.211e-02    5.043e-01       7.485 -16.760 2.432e-03
## SPAC22A12.17c    2.236e-02    9.694e-01       5.309 -10.080 8.325e-03
## SPAPB1A11.02     6.869e-01    4.981e-01       5.095  -7.708 1.687e-03
## SPCPB16A4.07     2.022e-02    2.894e-01       5.776 -17.480 1.780e-03
## SPNCRNA.1611     1.174e-01    1.141e-01       4.984 -18.270 5.286e-05
## SPBC660.05       2.068e-01    5.143e-01       4.775 -10.840 9.581e-04
##               basic_adjp limma_adjp_fdr deseq_adjp_fdr edger_adjp_fdr
## SPBC2F12.09c   3.928e-02      1.847e-02      6.176e-66     1.264e-180
## SPAC22A12.17c  6.437e-02      2.447e-02      4.660e-19      3.155e-57
## SPAPB1A11.02   3.452e-02      1.696e-02      2.224e-06      1.257e-14
## SPCPB16A4.07   3.524e-02      1.696e-02     9.594e-199     4.519e-138
## SPNCRNA.1611   1.455e-02      1.696e-02      5.930e-16      1.789e-33
## SPBC660.05     2.791e-02      2.795e-02      3.642e-14      9.869e-74
##               basic_adjp_fdr lfc_meta   lfc_var lfc_varbymed    p_meta
## SPBC2F12.09c       9.147e-03    7.152 0.000e+00    0.000e+00 1.506e-04
## SPAC22A12.17c      2.609e-02    5.916 1.123e-01    1.898e-02 5.087e-04
## SPAPB1A11.02       6.586e-03    6.137 5.880e-02    9.581e-03 5.561e-05
## SPCPB16A4.07       6.915e-03    5.632 2.810e-02    4.989e-03 4.993e-05
## SPNCRNA.1611       2.394e-04    5.521 2.657e-02    4.813e-03 2.073e-05
## SPBC660.05         3.914e-03    5.347 2.521e-02    4.714e-03 8.090e-04
##                   p_var
## SPBC2F12.09c  6.807e-08
## SPAC22A12.17c 7.762e-07
## SPAPB1A11.02  9.255e-09
## SPCPB16A4.07  7.480e-09
## SPNCRNA.1611  1.290e-09
## SPBC660.05    1.963e-06
## Here we see that edger and deseq agree the least:
all_comparisons$comparison$comp
##    wt30_vs_wt120
## le        0.9601
## ld        0.9808
## ed        0.9814
## lb        0.9779
## eb        0.9782
## db        0.9777
## And here we can look at the set of 'significant' genes according to various tools:
yeast_sig <- extract_significant_genes(all_combined, excel=FALSE)
## Writing excel data for wt30_vs_wt120: 1/4.
## After (adj)p filter, the up genes table has 633 genes.
## After (adj)p filter, the down genes table has 629 genes.
## After fold change filter, the up genes table has 325 genes.
## After fold change filter, the down genes table has 201 genes.
## Writing excel data for wt30_vs_wt120: 2/4.
## After (adj)p filter, the up genes table has 1107 genes.
## After (adj)p filter, the down genes table has 1052 genes.
## After fold change filter, the up genes table has 447 genes.
## After fold change filter, the down genes table has 278 genes.
## Writing excel data for wt30_vs_wt120: 3/4.
## After (adj)p filter, the up genes table has 904 genes.
## After (adj)p filter, the down genes table has 733 genes.
## After fold change filter, the up genes table has 403 genes.
## After fold change filter, the down genes table has 219 genes.
## Writing excel data for wt30_vs_wt120: 4/4.
## After (adj)p filter, the up genes table has 277 genes.
## After (adj)p filter, the down genes table has 237 genes.
## After fold change filter, the up genes table has 185 genes.
## After fold change filter, the down genes table has 105 genes.
yeast_barplots <- sm(significant_barplots(combined=all_combined))
yeast_barplots$limma

yeast_barplots$edger

yeast_barplots$deseq

2.5.1 Setting up

Since I didn’t acquire this data in a ‘normal’ way, I am going to post-generate a gff file which may be used by clusterprofiler, topgo, and gostats.

Therefore, I am going to make use of TxDb to make the requisite gff file.

limma_results <- limma_comparison$all_tables
## The set of comparisons performed
names(limma_results)
## [1] "wt30_vs_wt120"
table <- limma_results$wt30_vs_wt120
dim(table)
## [1] 7039    6
gene_names <- rownames(table)

updown_genes <- get_sig_genes(table, p=0.05, lfc=0.4, p_column="P.Value")
## After (adj)p filter, the up genes table has 1190 genes.
## After (adj)p filter, the down genes table has 1424 genes.
## After fold change filter, the up genes table has 962 genes.
## After fold change filter, the down genes table has 1069 genes.
tt <- please_install("GenomicFeatures")
tt <- please_install("biomaRt")
available_marts <- biomaRt::listMarts(host="fungi.ensembl.org")
available_marts
##            biomart                     version
## 1       fungi_mart      Ensembl Fungi Genes 39
## 2 fungi_variations Ensembl Fungi Variations 39
ensembl_mart <- biomaRt::useMart("fungi_mart", host="fungi.ensembl.org")
available_datasets <- biomaRt::listDatasets(ensembl_mart)
pombe_hit <- grep(pattern="pombe", x=available_datasets[["description"]])
pombe_name <- available_datasets[pombe_hit, "dataset"]
pombe_mart <- biomaRt::useDataset(pombe_name, mart=ensembl_mart)

pombe_goids <- biomaRt::getBM(attributes=c("pombase_transcript", "go_id"),
                              values=gene_names, mart=pombe_mart)
colnames(pombe_goids) <- c("ID", "GO")

2.5.2 Setting up with hpgltools

The above worked, it provided a table of ID and ontology. It was however a bit fraught. Here is another way.

## In theory, the above should work with a single function call:
pombe_goids_simple <- load_biomart_go(species="spombe", overwrite=TRUE,
                                      dl_rows=c("pombase_transcript", "go_id"),
                                      host="fungi.ensembl.org")
## Unable to perform useMart, perhaps the host/mart is incorrect: fungi.ensembl.org ENSEMBL_MART_ENSEMBL.
## The available marts are:
## fungi_martfungi_variations
## Trying the first one.
## Unable to perform useDataset, perhaps the given dataset is incorrect: spombe_gene_ensembl.
## Trying instead to use the dataset: spombe_eg_gene
## That seems to have worked, extracting the resulting annotations.
## Finished downloading ensembl go annotations, saving to spombe_go_annotations.rda.
## Saving ontologies to spombe_go_annotations.rda.
## Finished save().
head(pombe_goids_simple)
##               ID         GO
## 1    SPRRNA.50.1           
## 2 SPNCRNA.1095.1           
## 3   SPAC212.11.1 GO:0000784
## 4   SPAC212.11.1 GO:0005634
## 5   SPAC212.11.1 GO:0000166
## 6   SPAC212.11.1 GO:0005524
head(pombe_goids)
##               ID         GO
## 1    SPRRNA.50.1           
## 2 SPNCRNA.1095.1           
## 3   SPAC212.11.1 GO:0000784
## 4   SPAC212.11.1 GO:0005634
## 5   SPAC212.11.1 GO:0000166
## 6   SPAC212.11.1 GO:0005524
## This used to work, but does so no longer and I do not know why.
## pombe <- sm(GenomicFeatures::makeTxDbFromBiomart(biomart="fungal_mart",
##                                                  dataset="spombe_eg_gene",
##                                                  host="fungi.ensembl.org"))

## I bet I can get all this information from ensembl now.
## This was found at the bottom of: https://www.biostars.org/p/232005/
link <- "ftp://ftp.ensemblgenomes.org/pub/release-34/fungi/gff3/schizosaccharomyces_pombe/Schizosaccharomyces_pombe.ASM294v2.34.gff3.gz"
pombe <- GenomicFeatures::makeTxDbFromGFF(link, format="gff3", organism="Schizosaccharomyces pombe",
                                          taxonomyId=4896)
## Import genomic features from the file as a GRanges object ...
## OK
## Prepare the 'metadata' data frame ... OK
## Make the TxDb object ... OK
pombe_transcripts <- as.data.frame(GenomicFeatures::transcriptsBy(pombe))
lengths <- pombe_transcripts[, c("group_name","width")]
colnames(lengths) <- c("ID","width")
## Something useful I didn't notice before:
## makeTranscriptDbFromGFF()  ## From GenomicFeatures, much like my own gff2df()
gff_from_txdb <- GenomicFeatures::asGFF(pombe)
## why is GeneID: getting prefixed to the IDs!?
gff_from_txdb$ID <- gsub(x=gff_from_txdb$ID, pattern="GeneID:", replacement="")
written_gff <- rtracklayer::export.gff3(gff_from_txdb, con="pombe.gff")

2.6 GOSeq test

summary(updown_genes)
##            Length Class      Mode
## up_genes   6      data.frame list
## down_genes 6      data.frame list
test_genes <- updown_genes$down_genes
rownames(test_genes) <- paste0(rownames(test_genes), ".1")
lengths$ID <- paste0(lengths$ID, ".1")
goseq_result <- sm(simple_goseq(sig_genes=test_genes, go_db=pombe_goids, length_db=lengths))

head(goseq_result$alldata)
##        category over_represented_pvalue under_represented_pvalue
## 338  GO:0005634               5.646e-44                        1
## 347  GO:0005730               8.262e-34                        1
## 1208 GO:0042254               1.198e-29                        1
## 126  GO:0003674               1.972e-27                        1
## 349  GO:0005737               1.157e-24                        1
## 380  GO:0005829               6.801e-21                        1
##      numDEInCat numInCat                term ontology    qvalue
## 338         100      579             nucleus       CC 9.282e-41
## 347          35       61           nucleolus       CC 6.792e-31
## 1208         26       36 ribosome biogenesis       BP 6.567e-27
## 126          76      529  molecular_function       MF 8.106e-25
## 349          77      574           cytoplasm       CC 3.804e-22
## 380          70      550             cytosol       CC 1.863e-18
goseq_result$pvalue_plots$mfp_plot

test_genes <- updown_genes$up_genes
rownames(test_genes) <- paste0(rownames(test_genes), ".1")
goseq_result <- sm(simple_goseq(sig_genes=test_genes, go_db=pombe_goids, length_db=lengths))

head(goseq_result$alldata)
##       category over_represented_pvalue under_represented_pvalue numDEInCat
## 640 GO:0008150               1.540e-55                        1        127
## 380 GO:0005829               1.587e-50                        1        124
## 349 GO:0005737               7.741e-50                        1        126
## 338 GO:0005634               1.380e-44                        1        120
## 126 GO:0003674               3.390e-44                        1        113
## 854 GO:0016020               2.814e-39                        1         96
##     numInCat               term ontology    qvalue
## 640      544 biological_process       BP 2.532e-52
## 380      550            cytosol       CC 1.304e-47
## 349      574          cytoplasm       CC 4.242e-47
## 338      579            nucleus       CC 5.674e-42
## 126      529 molecular_function       MF 1.114e-41
## 854      411           membrane       CC 7.712e-37
goseq_result$pvalue_plots$bpp_plot

2.7 ClusterProfiler test

clusterProfiler really prefers an orgdb instance to use, which is probably smart, as they are pretty nice. Sadly, there is no pre-defined orgdb for pombe…

## holy crap makeOrgPackageFromNCBI is slow, no slower than some of mine, so who am I to complain.
orgdb <- AnnotationForge::makeOrgPackageFromNCBI(version="0.1", author="atb <abelew@gmail.com>",
                                                 maintainer="atb <abelew@gmail.com>", tax_id="4896",
                                                 genus="Schizosaccharomyces", species="pombe")
## This created the directory 'org.spombe.eg.db'
devtools::install_local("org.Spombe.eg.db")
library(org.Spombe.eg.db)
## Don't forget to remove the terminal .1 from the gene names...
## If you do forget this, it will fail for no easily visible reason until you remember
## this and get really mad at yourself.
rownames(test_genes) <- gsub(pattern=".1$", replacement="", x=rownames(test_genes))
pombe_goids[["ID"]] <- gsub(pattern=".1$", replacement="", x=pombe_goids[["ID"]])
cp_result <- simple_clusterprofiler(sig_genes=test_genes, do_david=FALSE, do_gsea=FALSE,
                                    de_table=all_combined$data[[1]],
                                    orgdb=org.Spombe.eg.db, orgdb_to="ALIAS")
cp_result[["pvalue_plots"]][["ego_all_mf"]]
## Yay bar plots!
## Get rid of those stupid terminal .1s.
rownames(test_genes) <- gsub(pattern=".1$", replacement="", x=rownames(test_genes))
pombe_goids[["ID"]] <- gsub(pattern=".1$", replacement="", x=pombe_goids[["ID"]])
tp_result <- sm(simple_topgo(sig_genes=test_genes, go_db=pombe_goids, pval_column="limma_adjp"))

tp_result[["pvalue_plots"]][["mfp_plot_over"]]

tp_result[["pvalue_plots"]][["bpp_plot_over"]]

## Get rid of those stupid terminal .1s.
rownames(test_genes) <- gsub(pattern=".1$", replacement="", x=rownames(test_genes))
pombe_goids[["ID"]] <- gsub(pattern=".1$", replacement="", x=pombe_goids[["ID"]])
## universe_merge is the column in the final data frame when.
## gff_type is the field in the gff file providing the id, this may be redundant with
## universe merge, that is something to check on...
gst_result <- sm(simple_gostats(sig_genes=test_genes, go_db=pombe_goids, universe_merge="id",
                                gff_type="gene",
                                gff="pombe.gff", pval_column="limma_adjp"))
pander::pander(sessionInfo())

R version 3.5.1 (2018-07-02)

Platform: x86_64-pc-linux-gnu (64-bit)

locale: LC_CTYPE=en_US.utf8, LC_NUMERIC=C, LC_TIME=en_US.utf8, LC_COLLATE=en_US.utf8, LC_MONETARY=en_US.utf8, LC_MESSAGES=en_US.utf8, LC_PAPER=en_US.utf8, LC_NAME=C, LC_ADDRESS=C, LC_TELEPHONE=C, LC_MEASUREMENT=en_US.utf8 and LC_IDENTIFICATION=C

attached base packages: parallel, stats4, stats, graphics, grDevices, utils, datasets, methods and base

other attached packages: edgeR(v.3.22.2), variancePartition(v.1.10.0), ggplot2(v.2.2.1), fission(v.0.114.0), SummarizedExperiment(v.1.10.1), DelayedArray(v.0.6.0), BiocParallel(v.1.14.1), matrixStats(v.0.53.1), Biobase(v.2.40.0), GenomicRanges(v.1.32.3), GenomeInfoDb(v.1.16.0), IRanges(v.2.14.10), S4Vectors(v.0.18.2), BiocGenerics(v.0.26.0), foreach(v.1.4.4), Vennerable(v.3.1.0.9000), ruv(v.0.9.7) and hpgltools(v.2018.03)

loaded via a namespace (and not attached): snow(v.0.4-2), backports(v.1.1.2), Hmisc(v.4.1-1), plyr(v.1.8.4), lazyeval(v.0.2.1), splines(v.3.5.1), sva(v.3.28.0), digest(v.0.6.15), BiocInstaller(v.1.30.0), htmltools(v.0.3.6), GO.db(v.3.6.0), gdata(v.2.18.0), magrittr(v.1.5), checkmate(v.1.8.5), memoise(v.1.1.0), cluster(v.2.0.7-1), doParallel(v.1.0.11), openxlsx(v.4.1.0), limma(v.3.36.1), Biostrings(v.2.48.0), annotate(v.1.58.0), prettyunits(v.1.0.2), colorspace(v.1.3-2), blob(v.1.1.1), ggrepel(v.0.8.0), BiasedUrn(v.1.07), RCurl(v.1.95-4.10), graph(v.1.58.0), genefilter(v.1.62.0), lme4(v.1.1-17), survival(v.2.42-4), iterators(v.1.0.9), gtable(v.0.2.0), zlibbioc(v.1.26.0), XVector(v.0.20.0), DEoptimR(v.1.0-8), SparseM(v.1.77), scales(v.0.5.0), DESeq(v.1.32.0), DBI(v.1.0.0), Rcpp(v.0.12.17), genoPlotR(v.0.8.7), xtable(v.1.8-2), progress(v.1.1.2), htmlTable(v.1.12), foreign(v.0.8-70), bit(v.1.1-14), preprocessCore(v.1.42.0), Formula(v.1.2-3), htmlwidgets(v.1.2), httr(v.1.3.1), gplots(v.3.0.1), RColorBrewer(v.1.1-2), acepack(v.1.4.1), pkgconfig(v.2.0.1), XML(v.3.98-1.11), nnet(v.7.3-12), locfit(v.1.5-9.1), labeling(v.0.3), rlang(v.0.2.1), reshape2(v.1.4.3), AnnotationDbi(v.1.42.1), munsell(v.0.4.3), tools(v.3.5.1), RSQLite(v.2.1.1), ade4(v.1.7-11), devtools(v.1.13.5), evaluate(v.0.10.1), stringr(v.1.3.1), yaml(v.2.1.19), knitr(v.1.20), bit64(v.0.9-7), geneLenDataBase(v.1.16.0), zip(v.1.0.0), pander(v.0.6.1), robustbase(v.0.93-0), caTools(v.1.17.1), packrat(v.0.4.9-3), RBGL(v.1.56.0), nlme(v.3.1-137), biomaRt(v.2.36.1), compiler(v.3.5.1), pbkrtest(v.0.4-7), rstudioapi(v.0.7), curl(v.3.2), tibble(v.1.4.2), geneplotter(v.1.58.0), stringi(v.1.2.2), highr(v.0.6), GenomicFeatures(v.1.32.0), lattice(v.0.20-35), Matrix(v.1.2-14), nloptr(v.1.0.4), pillar(v.1.2.3), goseq(v.1.32.0), data.table(v.1.11.4), bitops(v.1.0-6), corpcor(v.1.6.9), qvalue(v.2.12.0), rtracklayer(v.1.40.3), colorRamps(v.2.3), R6(v.2.2.2), latticeExtra(v.0.6-28), directlabels(v.2018.05.22), topGO(v.2.32.0), KernSmooth(v.2.23-15), gridExtra(v.2.3), codetools(v.0.2-15), MASS(v.7.3-50), gtools(v.3.5.0), assertthat(v.0.2.0), DESeq2(v.1.20.0), rprojroot(v.1.3-2), withr(v.2.1.2), GenomicAlignments(v.1.16.0), Rsamtools(v.1.32.0), GenomeInfoDbData(v.1.1.0), mgcv(v.1.8-24), doSNOW(v.1.0.16), quadprog(v.1.5-5), grid(v.3.5.1), rpart(v.4.1-13), minqa(v.1.2.4), rmarkdown(v.1.9), Rtsne(v.0.13) and base64enc(v.0.1-3)

LS0tCnRpdGxlOiAiaHBnbHRvb2xzIERpZmZlcmVudGlhbCBFeHByZXNzaW9uIEFuYWx5c2VzIFVzaW5nIHRoZSBGaXNzaW9uIERhdGFzZXQiCmF1dGhvcjogImF0YiBhYmVsZXdAZ21haWwuY29tIgpkYXRlOiAiYHIgU3lzLkRhdGUoKWAiCm91dHB1dDoKIGh0bWxfZG9jdW1lbnQ6CiAgY29kZV9kb3dubG9hZDogdHJ1ZQogIGNvZGVfZm9sZGluZzogc2hvdwogIGZpZ19jYXB0aW9uOiB0cnVlCiAgZmlnX2hlaWdodDogNwogIGZpZ193aWR0aDogNwogIGhpZ2hsaWdodDogZGVmYXVsdAogIGtlZXBfbWQ6IGZhbHNlCiAgbW9kZTogc2VsZmNvbnRhaW5lZAogIG51bWJlcl9zZWN0aW9uczogdHJ1ZQogIHNlbGZfY29udGFpbmVkOiB0cnVlCiAgdGhlbWU6IHJlYWRhYmxlCiAgdG9jOiB0cnVlCiAgdG9jX2Zsb2F0OgogICAgY29sbGFwc2VkOiBmYWxzZQogICAgc21vb3RoX3Njcm9sbDogZmFsc2UKdmlnbmV0dGU6ID4KICAlXFZpZ25ldHRlSW5kZXhFbnRyeXtjLTAzX2Zpc3Npb25fZGlmZmVyZW50aWFsX2V4cHJlc3Npb259CiAgJVxWaWduZXR0ZUVuZ2luZXtrbml0cjo6cm1hcmtkb3dufQogIFx1c2VwYWNrYWdlW3V0Zjhde2lucHV0ZW5jfQotLS0KCmBgYHtyIG9wdGlvbnMsIGluY2x1ZGU9RkFMU0V9CiMjIFRoZXNlIGFyZSB0aGUgb3B0aW9ucyBJIHRlbmQgdG8gZmF2b3IKbGlicmFyeSgiaHBnbHRvb2xzIikKIyMgdHQgPC0gZGV2dG9vbHM6OmxvYWRfYWxsKCJ+L2hwZ2x0b29scyIpCmtuaXRyOjpvcHRzX2tuaXQkc2V0KHByb2dyZXNzPVRSVUUsCiAgICAgICAgICAgICAgICAgICAgIHZlcmJvc2U9VFJVRSwKICAgICAgICAgICAgICAgICAgICAgd2lkdGg9OTAsCiAgICAgICAgICAgICAgICAgICAgIGVjaG89VFJVRSkKa25pdHI6Om9wdHNfY2h1bmskc2V0KGVycm9yPVRSVUUsCiAgICAgICAgICAgICAgICAgICAgICBmaWcud2lkdGg9OCwKICAgICAgICAgICAgICAgICAgICAgIGZpZy5oZWlnaHQ9OCwKICAgICAgICAgICAgICAgICAgICAgIGRwaT05NikKb2xkX29wdGlvbnMgPC0gb3B0aW9ucyhkaWdpdHM9NCwKICAgICAgICAgICAgICAgICAgICAgICBzdHJpbmdzQXNGYWN0b3JzPUZBTFNFLAogICAgICAgICAgICAgICAgICAgICAgIGtuaXRyLmR1cGxpY2F0ZS5sYWJlbD0iYWxsb3ciKQpnZ3Bsb3QyOjp0aGVtZV9zZXQoZ2dwbG90Mjo6dGhlbWVfYncoYmFzZV9zaXplPTEwKSkKc2V0LnNlZWQoMSkKcm1kX2ZpbGUgPC0gImMtMDNfZmlzc2lvbl9kaWZmZXJlbnRpYWxfZXhwcmVzc2lvbi5SbWQiCmBgYAoKIyBFeGFtcGxlIGhwZ2x0b29sIHVzYWdlIHdpdGggYSByZWFsIGRhdGEgc2V0IChmaXNzaW9uKQoKVGhpcyBkb2N1bWVudCBhaW1zIHRvIHByb3ZpZGUgZnVydGhlciBleGFtcGxlcyBpbiBob3cgdG8gdXNlIHRoZSBocGdsdG9vbHMuCgpOb3RlIHRvIHNlbGYsIHRoZSBoZWFkZXIgaGFzIHJtYXJrZG93bjo6cGRmX2RvY3VtZW50IGluc3RlYWQgb2YgaHRtbF9kb2N1bWVudCBvciBodG1sX3ZpZ25ldHRlCmJlY2F1c2UgaXQgZ2V0cyBzb21lIGJ1bGxjcmFwIGVycm9yICdtYXJnaW5zIHRvbyBsYXJnZScuLi4KCiMjIFNldHRpbmcgdXAKCkhlcmUgYXJlIHRoZSBjb21tYW5kcyBJIGludm9rZSB0byBnZXQgcmVhZHkgdG8gcGxheSB3aXRoIG5ldyBkYXRhLCBpbmNsdWRpbmcgZXZlcnl0aGluZwpyZXF1aXJlZCB0byBpbnN0YWxsIGhwZ2x0b29scywgdGhlIHNvZnR3YXJlIGl0IHVzZXMsIGFuZCB0aGUgZmlzc2lvbiBkYXRhLgoKYGBge3Igc2V0dXB9CmxpYnJhcnkoaHBnbHRvb2xzKQp0dCA8LSBzbShsaWJyYXJ5KGZpc3Npb24pKQp0dCA8LSBkYXRhKGZpc3Npb24pCmBgYAoKIyMgRGF0YSBpbXBvcnQKCkFsbCB0aGUgd29yayBJIGRvIGluIERyLiBFbC1TYXllZCdzIGxhYiBtYWtlcyBzb21lIHByZXR0eSBoYXJkCmFzc3VtcHRpb25zIGFib3V0IGhvdyBkYXRhIGlzIHN0b3JlZC4gIEFzIGEgcmVzdWx0LCB0byB1c2UgdGhlIGZpc3Npb24KZGF0YSBzZXQgSSB3aWxsIGRvIGEgbGl0dGxlIGJpdCBvZiBzaGVuYW5pZ2FucyB0byBtYXRjaCBpdCB0byB0aGUKZXhwZWN0ZWQgZm9ybWF0LiAgTm93IHRoYXQgSSBoYXZlIHBsYXllZCBhIGxpdHRsZSB3aXRoIGZpc3Npb24sIEkKdGhpbmsgaXRzIGZvcm1hdCBpcyBxdWl0ZSBuaWNlIGFuZCBhbSBsaWtlbHkgdG8gaGF2ZSBteSBleHBlcmltZW50CmNsYXNzIGluc3RlYWQgYmUgYSBTdW1tYXJpemVkRXhwZXJpbWVudC4KCmBgYHtyIGRhdGFfaW1wb3J0fQojIyBFeHRyYWN0IHRoZSBtZXRhIGRhdGEgZnJvbSB0aGUgZmlzc2lvbiBkYXRhc2V0Cm1ldGEgPC0gYXMuZGF0YS5mcmFtZShmaXNzaW9uQGNvbERhdGEpCiMjIE1ha2UgY29uZGl0aW9ucyBhbmQgYmF0Y2hlcwptZXRhJGNvbmRpdGlvbiA8LSBwYXN0ZShtZXRhJHN0cmFpbiwgbWV0YSRtaW51dGUsIHNlcD0iLiIpCm1ldGEkYmF0Y2ggPC0gbWV0YSRyZXBsaWNhdGUKbWV0YSRzYW1wbGUuaWQgPC0gcm93bmFtZXMobWV0YSkKIyMgR3JhYiB0aGUgY291bnQgZGF0YQpmaXNzaW9uX2RhdGEgPC0gZmlzc2lvbkBhc3NheXMkZGF0YSRjb3VudHMKIyMgVGhpcyB3aWxsIG1ha2UgYW4gZXhwZXJpbWVudCBzdXBlcmNsYXNzIGNhbGxlZCAnZXhwdCcgYW5kIGl0IGNvbnRhaW5zCiMjIGFuIEV4cHJlc3Npb25TZXQgYWxvbmcgd2l0aCBhbnkgYXJiaXRyYXJ5IGFkZGl0aW9uYWwgaW5mb3JtYXRpb24gb25lIG1pZ2h0IHdhbnQgdG8gaW5jbHVkZS4KIyMgQWxvbmcgdGhlIHdheSBpdCB3cml0ZXMgYSBSZGF0YSBmaWxlIHdoaWNoIGlzIGJ5IGRlZmF1bHQgY2FsbGVkICdleHB0LlJkYXRhJwpmaXNzaW9uX2V4cHQgPC0gY3JlYXRlX2V4cHQobWV0YWRhdGE9bWV0YSwgY291bnRfZGF0YWZyYW1lPWZpc3Npb25fZGF0YSkKYGBgCgojIFNvbWUgc2ltcGxlIGRpZmZlcmVudGlhbCBleHByZXNzaW9uIGFuYWx5c2VzCgpUcmF2aXMgd2lzZWx5IGltcG9zZXMgYSBsaW1pdCBvbiB0aGUgYW1vdW50IG9mIHRpbWUgZm9yIGJ1aWxkaW5nIHZpZ25ldHRlcy4KTXkgdG9vbHMgYnkgZGVmYXVsdCB3aWxsIGF0dGVtcHQgYWxsIHBvc3NpYmxlIHBhaXJ3aXNlIGNvbXBhcmlzb25zLCB3aGljaCB0YWtlcyBhIGxvbmcgdGltZS4KVGhlcmVmb3JlIEkgYW0gZ29pbmcgdG8gdGFrZSBhIHN1YnNldCBvZiB0aGUgZGF0YSBhbmQgbGltaXQgdGhlc2UgY29tcGFyaXNvbnMgdG8gdGhhdC4KCmBgYHtyIHNpbXBsZV9zdWJzZXR9CmZ1bl9kYXRhIDwtIHN1YnNldF9leHB0KGZpc3Npb25fZXhwdCwKICAgICAgICAgICAgICAgICAgICAgICAgc3Vic2V0PSJjb25kaXRpb249PSd3dC4xMjAnfGNvbmRpdGlvbj09J3d0LjMwJyIpCmZ1bl9ub3JtIDwtIHNtKG5vcm1hbGl6ZV9leHB0KGZ1bl9kYXRhLCBiYXRjaD0ibGltbWEiLCBub3JtPSJxdWFudCIsCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIHRyYW5zZm9ybT0ibG9nMiIsIGNvbnZlcnQ9ImNwbSIpKQpgYGAKCiMjIFRyeSB1c2luZyBsaW1tYSBmaXJzdAoKYGBge3Igc2ltcGxlX2xpbW1hfQpsaW1tYV9jb21wYXJpc29uIDwtIHNtKGxpbW1hX3BhaXJ3aXNlKGZ1bl9kYXRhKSkKbmFtZXMobGltbWFfY29tcGFyaXNvbiRhbGxfdGFibGVzKQpzdW1tYXJ5KGxpbW1hX2NvbXBhcmlzb24kYWxsX3RhYmxlcyR3dDMwX3ZzX3d0MTIwKQpzY2F0dGVyX3d0X211dCA8LSBleHRyYWN0X2NvZWZmaWNpZW50X3NjYXR0ZXIobGltbWFfY29tcGFyaXNvbiwgdHlwZT0ibGltbWEiLAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgeD0id3QzMCIsIHk9Ind0MTIwIikKc2NhdHRlcl93dF9tdXQkc2NhdHRlcgpzY2F0dGVyX3d0X211dCRib3RoX2hpc3RvZ3JhbSRwbG90ICsgZ2dwbG90Mjo6c2NhbGVfeV9jb250aW51b3VzKGxpbWl0cz1jKDAsIDAuMjApKQptYV93dF9tdXQgPC0gZXh0cmFjdF9kZV9wbG90cyhsaW1tYV9jb21wYXJpc29uLCB0eXBlPSJsaW1tYSIpCm1hX3d0X211dCRtYSRwbG90Cm1hX3d0X211dCR2b2xjYW5vJHBsb3QKYGBgCgojIyBUaGVuIERFU2VxMgoKYGBge3Igc2ltcGxlX2Rlc2VxMn0KZGVzZXFfY29tcGFyaXNvbiA8LSBzbShkZXNlcTJfcGFpcndpc2UoZnVuX2RhdGEpKQpzdW1tYXJ5KGRlc2VxX2NvbXBhcmlzb24kYWxsX3RhYmxlcyR3dDMwX3ZzX3d0MTIwKQpzY2F0dGVyX3d0X211dCA8LSBleHRyYWN0X2NvZWZmaWNpZW50X3NjYXR0ZXIoZGVzZXFfY29tcGFyaXNvbiwgdHlwZT0iZGVzZXEiLAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgeD0id3QzMCIsIHk9Ind0MTIwIiwgZ3Zpc19maWxlbmFtZT1OVUxMKQpzY2F0dGVyX3d0X211dCRzY2F0dGVyCnBsb3RzX3d0X211dCA8LSBleHRyYWN0X2RlX3Bsb3RzKGRlc2VxX2NvbXBhcmlzb24sIHR5cGU9ImRlc2VxIikKcGxvdHNfd3RfbXV0JG1hJHBsb3QKcGxvdHNfd3RfbXV0JHZvbGNhbm8kcGxvdApgYGAKCiMjIEFuZCBFZGdlUgoKYGBge3Igc2ltcGxlX2VkZ2VyfQplZGdlcl9jb21wYXJpc29uIDwtIHNtKGVkZ2VyX3BhaXJ3aXNlKGZ1bl9kYXRhLCBtb2RlbF9iYXRjaD1UUlVFKSkKcGxvdHNfd3RfbXV0IDwtIGV4dHJhY3RfZGVfcGxvdHMoZWRnZXJfY29tcGFyaXNvbiwgdHlwZT0iZWRnZXIiKQpzY2F0dGVyX3d0X211dCA8LSBleHRyYWN0X2NvZWZmaWNpZW50X3NjYXR0ZXIoZWRnZXJfY29tcGFyaXNvbiwgdHlwZT0iZWRnZXIiLAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgeD0id3QzMCIsIHk9Ind0MTIwIiwgZ3Zpc19maWxlbmFtZT1OVUxMKQpzY2F0dGVyX3d0X211dCRzY2F0dGVyCnBsb3RzX3d0X211dCRtYSRwbG90CnBsb3RzX3d0X211dCR2b2xjYW5vJHBsb3QKYGBgCgojIyBNeSBzdHVwaWQgYmFzaWMgY29tcGFyaXNvbgoKYGBge3Igc2ltcGxlX2Jhc2ljfQpiYXNpY19jb21wYXJpc29uIDwtIHNtKGJhc2ljX3BhaXJ3aXNlKGZ1bl9kYXRhKSkKc3VtbWFyeShiYXNpY19jb21wYXJpc29uJGFsbF90YWJsZXMkd3QzMF92c193dDEyMCkKc2NhdHRlcl93dF9tdXQgPC0gZXh0cmFjdF9jb2VmZmljaWVudF9zY2F0dGVyKGJhc2ljX2NvbXBhcmlzb24sIHR5cGU9ImJhc2ljIiwKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIHg9Ind0MzAiLCB5PSJ3dDEyMCIsIGd2aXNfZmlsZW5hbWU9TlVMTCkKc2NhdHRlcl93dF9tdXQkc2NhdHRlcgpwbG90c193dF9tdXQgPC0gZXh0cmFjdF9kZV9wbG90cyhiYXNpY19jb21wYXJpc29uLCB0eXBlPSJiYXNpYyIpCnBsb3RzX3d0X211dCRtYSRwbG90CnBsb3RzX3d0X211dCR2b2xjYW5vJHBsb3QKYGBgCgojIyBDb21iaW5lIHRoZW0gYWxsCgpgYGB7ciBzaW1wbGVfYWxsfQphbGxfY29tcGFyaXNvbnMgPC0gc20oYWxsX3BhaXJ3aXNlKGZ1bl9kYXRhLCBtb2RlbF9iYXRjaD1UUlVFKSkKYWxsX2NvbWJpbmVkIDwtIHNtKGNvbWJpbmVfZGVfdGFibGVzKGFsbF9jb21wYXJpc29ucywgZXhjZWw9RkFMU0UpKQpoZWFkKGFsbF9jb21iaW5lZCRkYXRhW1sxXV0pCnNpZ19nZW5lcyA8LSBzbShleHRyYWN0X3NpZ25pZmljYW50X2dlbmVzKGFsbF9jb21iaW5lZCwgZXhjZWw9RkFMU0UpKQpoZWFkKHNpZ19nZW5lcyRsaW1tYSR1cHNbWzFdXSkKCiMjIEhlcmUgd2Ugc2VlIHRoYXQgZWRnZXIgYW5kIGRlc2VxIGFncmVlIHRoZSBsZWFzdDoKYWxsX2NvbXBhcmlzb25zJGNvbXBhcmlzb24kY29tcAoKIyMgQW5kIGhlcmUgd2UgY2FuIGxvb2sgYXQgdGhlIHNldCBvZiAnc2lnbmlmaWNhbnQnIGdlbmVzIGFjY29yZGluZyB0byB2YXJpb3VzIHRvb2xzOgp5ZWFzdF9zaWcgPC0gZXh0cmFjdF9zaWduaWZpY2FudF9nZW5lcyhhbGxfY29tYmluZWQsIGV4Y2VsPUZBTFNFKQp5ZWFzdF9iYXJwbG90cyA8LSBzbShzaWduaWZpY2FudF9iYXJwbG90cyhjb21iaW5lZD1hbGxfY29tYmluZWQpKQp5ZWFzdF9iYXJwbG90cyRsaW1tYQp5ZWFzdF9iYXJwbG90cyRlZGdlcgp5ZWFzdF9iYXJwbG90cyRkZXNlcQpgYGAKCiMjIyBTZXR0aW5nIHVwCgpTaW5jZSBJIGRpZG4ndCBhY3F1aXJlIHRoaXMgZGF0YSBpbiBhICdub3JtYWwnIHdheSwgSSBhbSBnb2luZyB0byBwb3N0LWdlbmVyYXRlIGEKZ2ZmIGZpbGUgd2hpY2ggbWF5IGJlIHVzZWQgYnkgY2x1c3RlcnByb2ZpbGVyLCB0b3BnbywgYW5kIGdvc3RhdHMuCgpUaGVyZWZvcmUsIEkgYW0gZ29pbmcgdG8gbWFrZSB1c2Ugb2YgVHhEYiB0byBtYWtlIHRoZSByZXF1aXNpdGUgZ2ZmIGZpbGUuCgpgYGB7ciBvbnRvbG9neV9zZXR1cH0KbGltbWFfcmVzdWx0cyA8LSBsaW1tYV9jb21wYXJpc29uJGFsbF90YWJsZXMKIyMgVGhlIHNldCBvZiBjb21wYXJpc29ucyBwZXJmb3JtZWQKbmFtZXMobGltbWFfcmVzdWx0cykKdGFibGUgPC0gbGltbWFfcmVzdWx0cyR3dDMwX3ZzX3d0MTIwCmRpbSh0YWJsZSkKZ2VuZV9uYW1lcyA8LSByb3duYW1lcyh0YWJsZSkKCnVwZG93bl9nZW5lcyA8LSBnZXRfc2lnX2dlbmVzKHRhYmxlLCBwPTAuMDUsIGxmYz0wLjQsIHBfY29sdW1uPSJQLlZhbHVlIikKdHQgPC0gcGxlYXNlX2luc3RhbGwoIkdlbm9taWNGZWF0dXJlcyIpCnR0IDwtIHBsZWFzZV9pbnN0YWxsKCJiaW9tYVJ0IikKYXZhaWxhYmxlX21hcnRzIDwtIGJpb21hUnQ6Omxpc3RNYXJ0cyhob3N0PSJmdW5naS5lbnNlbWJsLm9yZyIpCmF2YWlsYWJsZV9tYXJ0cwplbnNlbWJsX21hcnQgPC0gYmlvbWFSdDo6dXNlTWFydCgiZnVuZ2lfbWFydCIsIGhvc3Q9ImZ1bmdpLmVuc2VtYmwub3JnIikKYXZhaWxhYmxlX2RhdGFzZXRzIDwtIGJpb21hUnQ6Omxpc3REYXRhc2V0cyhlbnNlbWJsX21hcnQpCnBvbWJlX2hpdCA8LSBncmVwKHBhdHRlcm49InBvbWJlIiwgeD1hdmFpbGFibGVfZGF0YXNldHNbWyJkZXNjcmlwdGlvbiJdXSkKcG9tYmVfbmFtZSA8LSBhdmFpbGFibGVfZGF0YXNldHNbcG9tYmVfaGl0LCAiZGF0YXNldCJdCnBvbWJlX21hcnQgPC0gYmlvbWFSdDo6dXNlRGF0YXNldChwb21iZV9uYW1lLCBtYXJ0PWVuc2VtYmxfbWFydCkKCnBvbWJlX2dvaWRzIDwtIGJpb21hUnQ6OmdldEJNKGF0dHJpYnV0ZXM9YygicG9tYmFzZV90cmFuc2NyaXB0IiwgImdvX2lkIiksCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIHZhbHVlcz1nZW5lX25hbWVzLCBtYXJ0PXBvbWJlX21hcnQpCmNvbG5hbWVzKHBvbWJlX2dvaWRzKSA8LSBjKCJJRCIsICJHTyIpCmBgYAoKIyMjIFNldHRpbmcgdXAgd2l0aCBocGdsdG9vbHMKClRoZSBhYm92ZSB3b3JrZWQsIGl0IHByb3ZpZGVkIGEgdGFibGUgb2YgSUQgYW5kIG9udG9sb2d5LiAgSXQgd2FzIGhvd2V2ZXIgYSBiaXQgZnJhdWdodC4KSGVyZSBpcyBhbm90aGVyIHdheS4KCmBgYHtyIG9udG9sb2d5X3NldHVwX2hwZ2x0b29sc30KIyMgSW4gdGhlb3J5LCB0aGUgYWJvdmUgc2hvdWxkIHdvcmsgd2l0aCBhIHNpbmdsZSBmdW5jdGlvbiBjYWxsOgpwb21iZV9nb2lkc19zaW1wbGUgPC0gbG9hZF9iaW9tYXJ0X2dvKHNwZWNpZXM9InNwb21iZSIsIG92ZXJ3cml0ZT1UUlVFLAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIGRsX3Jvd3M9YygicG9tYmFzZV90cmFuc2NyaXB0IiwgImdvX2lkIiksCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgaG9zdD0iZnVuZ2kuZW5zZW1ibC5vcmciKQpoZWFkKHBvbWJlX2dvaWRzX3NpbXBsZSkKaGVhZChwb21iZV9nb2lkcykKCiMjIFRoaXMgdXNlZCB0byB3b3JrLCBidXQgZG9lcyBzbyBubyBsb25nZXIgYW5kIEkgZG8gbm90IGtub3cgd2h5LgojIyBwb21iZSA8LSBzbShHZW5vbWljRmVhdHVyZXM6Om1ha2VUeERiRnJvbUJpb21hcnQoYmlvbWFydD0iZnVuZ2FsX21hcnQiLAojIyAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgZGF0YXNldD0ic3BvbWJlX2VnX2dlbmUiLAojIyAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgaG9zdD0iZnVuZ2kuZW5zZW1ibC5vcmciKSkKCiMjIEkgYmV0IEkgY2FuIGdldCBhbGwgdGhpcyBpbmZvcm1hdGlvbiBmcm9tIGVuc2VtYmwgbm93LgojIyBUaGlzIHdhcyBmb3VuZCBhdCB0aGUgYm90dG9tIG9mOiBodHRwczovL3d3dy5iaW9zdGFycy5vcmcvcC8yMzIwMDUvCmxpbmsgPC0gImZ0cDovL2Z0cC5lbnNlbWJsZ2Vub21lcy5vcmcvcHViL3JlbGVhc2UtMzQvZnVuZ2kvZ2ZmMy9zY2hpem9zYWNjaGFyb215Y2VzX3BvbWJlL1NjaGl6b3NhY2NoYXJvbXljZXNfcG9tYmUuQVNNMjk0djIuMzQuZ2ZmMy5neiIKcG9tYmUgPC0gR2Vub21pY0ZlYXR1cmVzOjptYWtlVHhEYkZyb21HRkYobGluaywgZm9ybWF0PSJnZmYzIiwgb3JnYW5pc209IlNjaGl6b3NhY2NoYXJvbXljZXMgcG9tYmUiLAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICB0YXhvbm9teUlkPTQ4OTYpCgpwb21iZV90cmFuc2NyaXB0cyA8LSBhcy5kYXRhLmZyYW1lKEdlbm9taWNGZWF0dXJlczo6dHJhbnNjcmlwdHNCeShwb21iZSkpCmxlbmd0aHMgPC0gcG9tYmVfdHJhbnNjcmlwdHNbLCBjKCJncm91cF9uYW1lIiwid2lkdGgiKV0KY29sbmFtZXMobGVuZ3RocykgPC0gYygiSUQiLCJ3aWR0aCIpCiMjIFNvbWV0aGluZyB1c2VmdWwgSSBkaWRuJ3Qgbm90aWNlIGJlZm9yZToKIyMgbWFrZVRyYW5zY3JpcHREYkZyb21HRkYoKSAgIyMgRnJvbSBHZW5vbWljRmVhdHVyZXMsIG11Y2ggbGlrZSBteSBvd24gZ2ZmMmRmKCkKZ2ZmX2Zyb21fdHhkYiA8LSBHZW5vbWljRmVhdHVyZXM6OmFzR0ZGKHBvbWJlKQojIyB3aHkgaXMgR2VuZUlEOiBnZXR0aW5nIHByZWZpeGVkIHRvIHRoZSBJRHMhPwpnZmZfZnJvbV90eGRiJElEIDwtIGdzdWIoeD1nZmZfZnJvbV90eGRiJElELCBwYXR0ZXJuPSJHZW5lSUQ6IiwgcmVwbGFjZW1lbnQ9IiIpCndyaXR0ZW5fZ2ZmIDwtIHJ0cmFja2xheWVyOjpleHBvcnQuZ2ZmMyhnZmZfZnJvbV90eGRiLCBjb249InBvbWJlLmdmZiIpCmBgYAoKIyMgR09TZXEgdGVzdAoKYGBge3IgdGVzdF9nb3NlcX0Kc3VtbWFyeSh1cGRvd25fZ2VuZXMpCnRlc3RfZ2VuZXMgPC0gdXBkb3duX2dlbmVzJGRvd25fZ2VuZXMKcm93bmFtZXModGVzdF9nZW5lcykgPC0gcGFzdGUwKHJvd25hbWVzKHRlc3RfZ2VuZXMpLCAiLjEiKQpsZW5ndGhzJElEIDwtIHBhc3RlMChsZW5ndGhzJElELCAiLjEiKQpnb3NlcV9yZXN1bHQgPC0gc20oc2ltcGxlX2dvc2VxKHNpZ19nZW5lcz10ZXN0X2dlbmVzLCBnb19kYj1wb21iZV9nb2lkcywgbGVuZ3RoX2RiPWxlbmd0aHMpKQpoZWFkKGdvc2VxX3Jlc3VsdCRhbGxkYXRhKQpnb3NlcV9yZXN1bHQkcHZhbHVlX3Bsb3RzJG1mcF9wbG90Cgp0ZXN0X2dlbmVzIDwtIHVwZG93bl9nZW5lcyR1cF9nZW5lcwpyb3duYW1lcyh0ZXN0X2dlbmVzKSA8LSBwYXN0ZTAocm93bmFtZXModGVzdF9nZW5lcyksICIuMSIpCmdvc2VxX3Jlc3VsdCA8LSBzbShzaW1wbGVfZ29zZXEoc2lnX2dlbmVzPXRlc3RfZ2VuZXMsIGdvX2RiPXBvbWJlX2dvaWRzLCBsZW5ndGhfZGI9bGVuZ3RocykpCmhlYWQoZ29zZXFfcmVzdWx0JGFsbGRhdGEpCmdvc2VxX3Jlc3VsdCRwdmFsdWVfcGxvdHMkYnBwX3Bsb3QKYGBgCgojIyBDbHVzdGVyUHJvZmlsZXIgdGVzdAoKY2x1c3RlclByb2ZpbGVyIHJlYWxseSBwcmVmZXJzIGFuIG9yZ2RiIGluc3RhbmNlIHRvIHVzZSwgd2hpY2ggaXMgcHJvYmFibHkgc21hcnQsIGFzIHRoZXkgYXJlCnByZXR0eSBuaWNlLiAgU2FkbHksIHRoZXJlIGlzIG5vIHByZS1kZWZpbmVkIG9yZ2RiIGZvciBwb21iZS4uLgoKYGBge3IgdGVzdF9jcCwgZXZhbD1GQUxTRX0KIyMgaG9seSBjcmFwIG1ha2VPcmdQYWNrYWdlRnJvbU5DQkkgaXMgc2xvdywgbm8gc2xvd2VyIHRoYW4gc29tZSBvZiBtaW5lLCBzbyB3aG8gYW0gSSB0byBjb21wbGFpbi4Kb3JnZGIgPC0gQW5ub3RhdGlvbkZvcmdlOjptYWtlT3JnUGFja2FnZUZyb21OQ0JJKHZlcnNpb249IjAuMSIsIGF1dGhvcj0iYXRiIDxhYmVsZXdAZ21haWwuY29tPiIsCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICBtYWludGFpbmVyPSJhdGIgPGFiZWxld0BnbWFpbC5jb20+IiwgdGF4X2lkPSI0ODk2IiwKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIGdlbnVzPSJTY2hpem9zYWNjaGFyb215Y2VzIiwgc3BlY2llcz0icG9tYmUiKQojIyBUaGlzIGNyZWF0ZWQgdGhlIGRpcmVjdG9yeSAnb3JnLnNwb21iZS5lZy5kYicKZGV2dG9vbHM6Omluc3RhbGxfbG9jYWwoIm9yZy5TcG9tYmUuZWcuZGIiKQpsaWJyYXJ5KG9yZy5TcG9tYmUuZWcuZGIpCiMjIERvbid0IGZvcmdldCB0byByZW1vdmUgdGhlIHRlcm1pbmFsIC4xIGZyb20gdGhlIGdlbmUgbmFtZXMuLi4KIyMgSWYgeW91IGRvIGZvcmdldCB0aGlzLCBpdCB3aWxsIGZhaWwgZm9yIG5vIGVhc2lseSB2aXNpYmxlIHJlYXNvbiB1bnRpbCB5b3UgcmVtZW1iZXIKIyMgdGhpcyBhbmQgZ2V0IHJlYWxseSBtYWQgYXQgeW91cnNlbGYuCnJvd25hbWVzKHRlc3RfZ2VuZXMpIDwtIGdzdWIocGF0dGVybj0iLjEkIiwgcmVwbGFjZW1lbnQ9IiIsIHg9cm93bmFtZXModGVzdF9nZW5lcykpCnBvbWJlX2dvaWRzW1siSUQiXV0gPC0gZ3N1YihwYXR0ZXJuPSIuMSQiLCByZXBsYWNlbWVudD0iIiwgeD1wb21iZV9nb2lkc1tbIklEIl1dKQpjcF9yZXN1bHQgPC0gc2ltcGxlX2NsdXN0ZXJwcm9maWxlcihzaWdfZ2VuZXM9dGVzdF9nZW5lcywgZG9fZGF2aWQ9RkFMU0UsIGRvX2dzZWE9RkFMU0UsCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIGRlX3RhYmxlPWFsbF9jb21iaW5lZCRkYXRhW1sxXV0sCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIG9yZ2RiPW9yZy5TcG9tYmUuZWcuZGIsIG9yZ2RiX3RvPSJBTElBUyIpCmNwX3Jlc3VsdFtbInB2YWx1ZV9wbG90cyJdXVtbImVnb19hbGxfbWYiXV0KIyMgWWF5IGJhciBwbG90cyEKYGBgCgpgYGB7ciB0ZXN0X3RwfQojIyBHZXQgcmlkIG9mIHRob3NlIHN0dXBpZCB0ZXJtaW5hbCAuMXMuCnJvd25hbWVzKHRlc3RfZ2VuZXMpIDwtIGdzdWIocGF0dGVybj0iLjEkIiwgcmVwbGFjZW1lbnQ9IiIsIHg9cm93bmFtZXModGVzdF9nZW5lcykpCnBvbWJlX2dvaWRzW1siSUQiXV0gPC0gZ3N1YihwYXR0ZXJuPSIuMSQiLCByZXBsYWNlbWVudD0iIiwgeD1wb21iZV9nb2lkc1tbIklEIl1dKQp0cF9yZXN1bHQgPC0gc20oc2ltcGxlX3RvcGdvKHNpZ19nZW5lcz10ZXN0X2dlbmVzLCBnb19kYj1wb21iZV9nb2lkcywgcHZhbF9jb2x1bW49ImxpbW1hX2FkanAiKSkKCnRwX3Jlc3VsdFtbInB2YWx1ZV9wbG90cyJdXVtbIm1mcF9wbG90X292ZXIiXV0KdHBfcmVzdWx0W1sicHZhbHVlX3Bsb3RzIl1dW1siYnBwX3Bsb3Rfb3ZlciJdXQpgYGAKCmBgYHtyIGdzdF90ZXN0LCBldmFsPUZBTFNFfQojIyBHZXQgcmlkIG9mIHRob3NlIHN0dXBpZCB0ZXJtaW5hbCAuMXMuCnJvd25hbWVzKHRlc3RfZ2VuZXMpIDwtIGdzdWIocGF0dGVybj0iLjEkIiwgcmVwbGFjZW1lbnQ9IiIsIHg9cm93bmFtZXModGVzdF9nZW5lcykpCnBvbWJlX2dvaWRzW1siSUQiXV0gPC0gZ3N1YihwYXR0ZXJuPSIuMSQiLCByZXBsYWNlbWVudD0iIiwgeD1wb21iZV9nb2lkc1tbIklEIl1dKQojIyB1bml2ZXJzZV9tZXJnZSBpcyB0aGUgY29sdW1uIGluIHRoZSBmaW5hbCBkYXRhIGZyYW1lIHdoZW4uCiMjIGdmZl90eXBlIGlzIHRoZSBmaWVsZCBpbiB0aGUgZ2ZmIGZpbGUgcHJvdmlkaW5nIHRoZSBpZCwgdGhpcyBtYXkgYmUgcmVkdW5kYW50IHdpdGgKIyMgdW5pdmVyc2UgbWVyZ2UsIHRoYXQgaXMgc29tZXRoaW5nIHRvIGNoZWNrIG9uLi4uCmdzdF9yZXN1bHQgPC0gc20oc2ltcGxlX2dvc3RhdHMoc2lnX2dlbmVzPXRlc3RfZ2VuZXMsIGdvX2RiPXBvbWJlX2dvaWRzLCB1bml2ZXJzZV9tZXJnZT0iaWQiLAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIGdmZl90eXBlPSJnZW5lIiwKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICBnZmY9InBvbWJlLmdmZiIsIHB2YWxfY29sdW1uPSJsaW1tYV9hZGpwIikpCmBgYAoKYGBge3Igc3lzaW5mbywgcmVzdWx0cz0iYXNpcyJ9CnBhbmRlcjo6cGFuZGVyKHNlc3Npb25JbmZvKCkpCmBgYAo=